package cn.jc.pachong.service;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class URLFinder {

	/**
	 * 从页面内容中获取URL列表
	 * http://********
	 * <a href="/android/yinyueshipin">音乐视频</a>
	 * 
	 * @param content
	 */
	public static List<String> findUrls (String content){
//		String reg = "<[a|A].*?href=[\'\"]{0,1}([^>\'\"\ ]*).*?>";
		try {
			getUrl(content);
		} catch (IOException e) {
			e.printStackTrace();
		}
		return new ArrayList<String>();
	}

	public static void getUrl(String content) throws IOException {
		final String regex = "([a-zA-Z]+://[www]?.[A-Z,a-z,0-9,\\p{Punct},\\+]+) ";
		Pattern p = Pattern.compile(regex, Pattern.MULTILINE | Pattern.DOTALL);
		Matcher m = p.matcher(content);
		while (m.find()) {
			System.out.println(m.group());
		}
	}
}
